{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0b523e0a",
   "metadata": {},
   "source": [
    "# Lesson 4: Building a Multi-Document Agent"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a323703",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9625ab2-71b6-4fd0-904e-42df80d3215f",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from helper import get_openai_api_key\n",
    "OPENAI_API_KEY = get_openai_api_key()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3221a474-5817-4db2-af46-e029042a75a5",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20adaa26",
   "metadata": {},
   "source": [
    "## 1. Setup an agent over 3 papers"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "48b71ff6",
   "metadata": {},
   "source": [
    "**Note**: The pdf files are included with this lesson. To access these papers, go to the `File` menu and select`Open...`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed10a24b-d65c-4b98-a93a-94ccdb8900d0",
   "metadata": {
    "height": 199,
    "tags": []
   },
   "outputs": [],
   "source": [
    "urls = [\n",
    "    \"https://openreview.net/pdf?id=VtmBAGCN7o\",\n",
    "    \"https://openreview.net/pdf?id=6PmJoRfdaK\",\n",
    "    \"https://openreview.net/pdf?id=hSyW5go0v8\",\n",
    "]\n",
    "\n",
    "papers = [\n",
    "    \"metagpt.pdf\",\n",
    "    \"longlora.pdf\",\n",
    "    \"selfrag.pdf\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d8f3185-3221-4b00-bd38-41d36e4a3307",
   "metadata": {
    "height": 165,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import get_doc_tools\n",
    "from pathlib import Path\n",
    "\n",
    "paper_to_tools_dict = {}\n",
    "for paper in papers:\n",
    "    print(f\"Getting tools for paper: {paper}\")\n",
    "    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)\n",
    "    paper_to_tools_dict[paper] = [vector_tool, summary_tool]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e541bdd-14e1-41b6-81b5-b1bfda078d07",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bff58c52",
   "metadata": {
    "height": 63
   },
   "outputs": [],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "\n",
    "llm = OpenAI(model=\"gpt-3.5-turbo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f2c6a9f",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "len(initial_tools)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a124a438-5609-402e-8642-69d1088cb9ad",
   "metadata": {
    "height": 165,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core.agent import FunctionCallingAgentWorker\n",
    "from llama_index.core.agent import AgentRunner\n",
    "\n",
    "agent_worker = FunctionCallingAgentWorker.from_tools(\n",
    "    initial_tools, \n",
    "    llm=llm, \n",
    "    verbose=True\n",
    ")\n",
    "agent = AgentRunner(agent_worker)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "17409d4c-05a9-4bf4-b74f-75135fa3cb6b",
   "metadata": {
    "height": 80,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = agent.query(\n",
    "    \"Tell me about the evaluation dataset used in LongLoRA, \"\n",
    "    \"and then tell me about the evaluation results\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ace340b1-761f-4058-be41-68cf131541e4",
   "metadata": {
    "height": 63,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = agent.query(\"Give me a summary of both Self-RAG and LongLoRA\")\n",
    "print(str(response))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7eede70c",
   "metadata": {},
   "source": [
    "## 2. Setup an agent over 11 papers"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18771e69",
   "metadata": {},
   "source": [
    "### Download 11 ICLR papers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60d01d2c-547f-4054-b0fe-ed9b1a9cc3b5",
   "metadata": {
    "height": 471,
    "tags": []
   },
   "outputs": [],
   "source": [
    "urls = [\n",
    "    \"https://openreview.net/pdf?id=VtmBAGCN7o\",\n",
    "    \"https://openreview.net/pdf?id=6PmJoRfdaK\",\n",
    "    \"https://openreview.net/pdf?id=LzPWWPAdY4\",\n",
    "    \"https://openreview.net/pdf?id=VTF8yNQM66\",\n",
    "    \"https://openreview.net/pdf?id=hSyW5go0v8\",\n",
    "    \"https://openreview.net/pdf?id=9WD9KwssyT\",\n",
    "    \"https://openreview.net/pdf?id=yV6fD7LYkF\",\n",
    "    \"https://openreview.net/pdf?id=hnrB5YHoYu\",\n",
    "    \"https://openreview.net/pdf?id=WbWtOYIzIK\",\n",
    "    \"https://openreview.net/pdf?id=c5pwL0Soay\",\n",
    "    \"https://openreview.net/pdf?id=TpD2aG1h0D\"\n",
    "]\n",
    "\n",
    "papers = [\n",
    "    \"metagpt.pdf\",\n",
    "    \"longlora.pdf\",\n",
    "    \"loftq.pdf\",\n",
    "    \"swebench.pdf\",\n",
    "    \"selfrag.pdf\",\n",
    "    \"zipformer.pdf\",\n",
    "    \"values.pdf\",\n",
    "    \"finetune_fair_diffusion.pdf\",\n",
    "    \"knowledge_card.pdf\",\n",
    "    \"metra.pdf\",\n",
    "    \"vr_mcl.pdf\"\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b77426cb",
   "metadata": {
    "tags": []
   },
   "source": [
    "To download these papers, below is the needed code:\n",
    "\n",
    "\n",
    "    #for url, paper in zip(urls, papers):\n",
    "         #!wget \"{url}\" -O \"{paper}\"\n",
    "    \n",
    "    \n",
    "**Note**: The pdf files are included with this lesson. To access these papers, go to the `File` menu and select`Open...`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea5ee34d-02ac-4537-ae20-7ef6c5767172",
   "metadata": {
    "height": 165,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import get_doc_tools\n",
    "from pathlib import Path\n",
    "\n",
    "paper_to_tools_dict = {}\n",
    "for paper in papers:\n",
    "    print(f\"Getting tools for paper: {paper}\")\n",
    "    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)\n",
    "    paper_to_tools_dict[paper] = [vector_tool, summary_tool]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e35d52c",
   "metadata": {},
   "source": [
    "### Extend the Agent with Tool Retrieval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20154923-873e-4941-9a3a-4926ab5f9b8c",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "all_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "671582f9-70d7-4a8f-b813-58b2a068ca72",
   "metadata": {
    "height": 148,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# define an \"object\" index and retriever over these tools\n",
    "from llama_index.core import VectorStoreIndex\n",
    "from llama_index.core.objects import ObjectIndex\n",
    "\n",
    "obj_index = ObjectIndex.from_objects(\n",
    "    all_tools,\n",
    "    index_cls=VectorStoreIndex,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3929882-e9dc-46ca-b495-53e3ed60340e",
   "metadata": {
    "height": 29,
    "tags": []
   },
   "outputs": [],
   "source": [
    "obj_retriever = obj_index.as_retriever(similarity_top_k=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba9cfecd-fe14-4da8-b9ba-b3d485d98a03",
   "metadata": {
    "height": 80,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tools = obj_retriever.retrieve(\n",
    "    \"Tell me about the eval dataset used in MetaGPT and SWE-Bench\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c270ffbf-69c7-48ea-a028-9ba25221cde5",
   "metadata": {
    "height": 29,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tools[2].metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cc0a0b6-9858-4348-9ae0-1cd4160f3fb7",
   "metadata": {
    "height": 267,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core.agent import FunctionCallingAgentWorker\n",
    "from llama_index.core.agent import AgentRunner\n",
    "\n",
    "agent_worker = FunctionCallingAgentWorker.from_tools(\n",
    "    tool_retriever=obj_retriever,\n",
    "    llm=llm, \n",
    "    system_prompt=\"\"\" \\\n",
    "You are an agent designed to answer queries over a set of given papers.\n",
    "Please always use the tools provided to answer a question. Do not rely on prior knowledge.\\\n",
    "\n",
    "\"\"\",\n",
    "    verbose=True\n",
    ")\n",
    "agent = AgentRunner(agent_worker)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a250cf1a-e011-4994-bcca-4e0294f20864",
   "metadata": {
    "height": 97,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = agent.query(\n",
    "    \"Tell me about the evaluation dataset used \"\n",
    "    \"in MetaGPT and compare it against SWE-Bench\"\n",
    ")\n",
    "print(str(response))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8084c8cb-98ed-4835-aaa4-5b0c7254be6d",
   "metadata": {
    "height": 80,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = agent.query(\n",
    "    \"Compare and contrast the LoRA papers (LongLoRA, LoftQ). \"\n",
    "    \"Analyze the approach in each paper first. \"\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
